#include "webPage.h"   

SplitTool* WebPage::_spl = new SplitTool();//避免jieba的重复初始化，很耗费时间
WebPage::WebPage(string& content)
:_content(content)
{
    //processDoc();//效率太低
}

map<string,int>& WebPage::getWordsMap()
{
    return _wordsMap;
}

vector<string>& WebPage::getWords()
{
    return _words;
}

//处理一整行格式化好的网页数据
void WebPage::processDoc()
{ 
    _words = (*_spl).cut(_content);//一篇文章的所有词存进临时vector

    //存进map,保存每个网页的所有词语和词频
    for(auto &elem : _words)
    {
        ++_wordsMap[elem];
    }
}
